package cc.factorie.app.bib.parser

/**
 * @author Luke Vilnis
 * @date 5/10/2012
 */

private[parser] object AST {
  // these types all have very generic names, so wrap them in an "AST" prefix
  sealed trait Entry
  sealed trait Value

  final case class Document(entries: List[Entry])

  final case class StringEntry(abbrev: String, value: Value) extends Entry
  final case class PreambleEntry(content: Value) extends Entry
  final case class CommentEntry(comment: String) extends Entry
  final case class RegularEntry(
    ty: String, citationKey: String, tags: List[(String, Value)]) extends Entry

  final case class Literal(content: String) extends Value
  final case class Abbrev(name: String) extends Value
  final case class Concat(left: Value, right: Value) extends Value

}

private[parser] object DocumentParser {

  import AST._

  // this should return an Either or a custom error object instead of a useless "None"
  def parseString(input: String): Either[String, Document] = {
    val res = Impl.parseAll(Impl.bibTex, input)
    res.map(r => Right(Document(r))).getOrElse(Left(res.toString))
  }

  object Impl extends SharedParsers {

    def bibTex =
      (freeComment ~! anyEntry ~! freeComment).+ ^^
      (_ flatMap { case x ~ y ~ z => List(x, y, z): List[Entry] })

    // FIXME: lines starting with %%% are comments
    def freeComment = (/*"(%%[^\r\n]*)+" |*/ "[^@]*") ^^ (CommentEntry(_))

    def anyEntry = AT ~> (commentEntry | stringEntry | preambleEntry | regularEntry)

    def commentEntry =
      COMMENT ~> (WS ~> (('{' ~> "[^}]*" <~ '}') | ('(' ~> "[^)]*" <~ ')')) | "[^@\r\n]*") ^^
      (CommentEntry(_))

    def stringEntry = STRING ~> WS ~> entryBody { tag } ^^ (StringEntry(_, _)).tupled

    def preambleEntry = PREAMBLE ~> WS ~> entryBody { value } ^^ (PreambleEntry(_))

    def regularEntry =
      (SYMBOL <~ WS) ~ entryBody { SYMBOL_CAN_START_WITH_NUMBER ~ rep((COMMA_WS | WS) ~> tag) <~ (COMMA_WS ?) } ^^ {
        case ty ~ (key ~ tags) => RegularEntry(ty, key, tags)
      }

    def entryBody[T](parser: => Parser[T]): Parser[T] = {
      lazy val p = parser
      ("\\{\\s*" ~> p <~ "\\s*\\}") |
      ("\\(\\s*" ~> p <~ "\\s*\\)")
    }

    def tag = (SYMBOL <~ "\\s*=\\s*") ~ value ^^ {
      case sym ~ value => (sym, value)
    }

    def value: Parser[Value] = literalOrSymbol ~ ("\\s*#\\s*" ~> value).? ^^ {
      case left ~ Some(right) => Concat(left, right)
      case left ~ _ => left
    }

    def literalOrSymbol = (SYMBOL ^^ (Abbrev(_))) | literal

    def literal = (numericLiteral | braceDelimitedNoOuterLiteral | quoteDelimitedLiteral)

    def numericLiteral = "\\d+(\\.\\d+)?" ^^ (Literal(_))
    def quoteDelimitedLiteral =
      '"' ~> (BRACE_DELIMITED_STRING | """\\.""" | """[^"]""").* <~ '"' ^^ (xs => Literal(xs.mkString))
    def braceDelimitedNoOuterLiteral = BRACE_DELIMITED_STRING_NO_OUTER ^^ (Literal(_))

    def AT = c('@')
    def COMMA_WS = r("\\s*,\\s*")
    def COMMENT = r("(c|C)(o|O)(m|M)(m|M)(e|E)(n|N)(t|T)")
    def STRING = r("(s|S)(t|T)(r|R)(i|I)(n|N)(g|G)")
    def PREAMBLE = r("(p|P)(r|R)(e|E)(a|A)(m|M)(b|B)(l|L)(e|E)")
    // anything except can't start with number, quotes, braces/parens, '#', commas, whitespace, or '='
    def SYMBOL = r("[^0-9\"}{)(,\\s#=][^\"}{)(,\\s#=]*")
    // can start with (or even be entirely) a number
    def SYMBOL_CAN_START_WITH_NUMBER = r("[^\"}{)(,\\s#=][^\"}{)(,\\s#=]*")
  }
}
